clear
set more off
capture log close
version 14

* Spending Study 1 data:
global d "D:\Home\aejack\Data\SpendingStudy\DataDocumentation\v2"

* Innovation Panel wave 9 data:
global ip9eul "D:\Home\aejack\Data\IP\IP1-9-EUL\stata13_se"

* Intermediate files:
global j "D:\Home\aejack\Research\NewTechnologies\TillReceiptScanning\junk2"

* Do-files:
global do "D:\Home\aejack\Research\NewTechnologies\TillReceiptScanning\do-files"

***************************************************************************
* Spending Study 1
***************************************************************************


* create derived variables for app data
******************************************
use "$d/apd", clear
do "$d/cr_apd_dv"
sort pidp
save $j/app_para_dvs, replace



* get Innovation Panel wave 9 (IP9) data
**********************************************
* -- mobile device questions
* -- other predictors of participation in scanning

use $ip9eul/i_indresp_ip, clear
duplicates report pidp
keep pidp i_hidp i_scsmtphone i_sctablet i_scwill5sp i_scwill5tb i_scwill2sp i_scwill2tb i_indmode i_netpuse ///
	i_finnow i_finbud* i_bankbal i_balhow* i_finsite* i_taxsa i_accountnt i_finhelp i_loyalty* ///
	i_pdvage   i_fcacon1 i_psu i_strata ///
	i_schomewifi i_sccontrtsp i_scfrqusesp i_scfrqusetb i_scphotosp i_scphototb i_scbuysp  i_scbuytb  ///
    i_scbanksp  i_scbanktb i_scinstalsp  i_scinstaltb i_scskillsp i_scskilltb i_scworry2 i_scworry5 ///
	i_scwrrymod1-i_scwrrymod6 i_hiqual i_scflagip9 i_fimngrs_dv i_sex

* merge on HH expenditure variables from HHRESP
sort i_hidp
merge m:1 i_hidp using $ip9eul/i_hhresp_ip, ///
	keepusing(i_hidp i_xpfood1_g3 i_xpfdout_g3 i_xpaltob_g3 i_xpduely i_xpelecy i_xpgasy i_xpoily i_xpsfly ///
	i_xphsdb i_xphsdct i_xphsdba)
drop if _merge==2
drop _merge

sort pidp
save $j/i_indresp, replace


* calculate item non-response rate in IP9 INDRESP file
*********************************************************
use $ip9eul/i_indresp_ip, clear
drop i_indendtime*  // string variable
egen miss = anycount(_all), values(-9 -2 -1) 
egen miss2 = anycount(_all), values(-2 -1)
egen inelig = anycount(_all), values(-8 -7)
describe, short
return list
local base = r(k)

gen inr = (miss/(`base' - inelig)) 
lab var inr "missing/DK/REF"
gen inr2 = (miss2/(`base' - inelig)) 
lab var inr2 "DK/REF"

list miss inelig inr inr2 in 1/100

keep pidp inr inr2	
sort pidp
save $j/inr, replace



* get Spending Study 1 sample file
************************************
* -- scanning incentive treatments

use "$d/sct", clear
duplicates report pidp


* merge on Spending Study 1 registration survey data
********************************************************
sort pidp
merge 1:1 pidp using "$d/reg"
gen download_dv = _merge==3
lab var download_dv "whether downloaded app"
lab def download_dv 0 "did not download app" 1 "downloaded app"
lab val download_dv download_dv
tab _merge download_dv, miss
drop _merge



* merge on IP9 data
*********************
sort pidp
merge 1:1 pidp using $j/i_indresp

drop if _merge==2 // 68 cases in IP9 but not in sample file  
tab ip9outcome if _merge==1, miss // observations in sample file but not IP9: IP9 non-respondents
drop _merge
erase $j/i_indresp.dta


* merge on item non-response rates
**************************************
sort pidp
merge 1:1 pidp using $j/inr
drop if _merge==2
tab _merge ip9outcome , miss
drop _merge

drop starttime // variables from eow / eop or registration survey? drop - need starttime from app paradata
erase $j/inr.dta


* merge on app data
*********************
sort pidp
merge 1:m pidp using "$j/app_para_dvs"

gen everapp_dv = _merge==3
lab var everapp_dv "whether ever used app"
lab def everapp_dv 0 "did not use app" 1 "used app"
lab val everapp_dv everapp_dv
drop _merge
erase "$j/app_para_dvs.dta"



* clean/derive variables
**************************

* whether used app at least once every week for 5 consecutive weeks
bysort pidp week_dv (starttime): gen tagwk = _n==1 if week_dv<.
sort pidp week_dv starttime
list starttime week_dv tagwk, sepby(pidp)
bysort pidp (week_dv tagwk): gen tagwksum = sum(tagwk) if tagwk
sort pidp week_dv starttime
list starttime week_dv tagwk tagwksum, sepby(pidp)
* include only the first five weeks of app use
replace tagwk=0 if tagwksum>5
* check whether these are consecutive

sort pidp tagwk tagwksum
list starttime week_dv tagwk tagwksum , sepby(pidp)
bysort pidp (tagwk tagwksum): gen seq = week_dv == week_dv[_n-1] + 1 if tagwk==1
* set seq=1 for first week:
bysort pidp (tagwk tagwksum): replace seq = 1 if week_dv == week_dv[_n+1] - 1 & tagwksum==1
* set seq=1 if only one week:
replace seq = 1 if tagwksum==1

list starttime week_dv tagwk tagwksum seq, sepby(pidp)

bysort pidp : egen consecwks = sum(seq)
list starttime week_dv tagwk tagwksum seq consecwks, sepby(pidp)

* create binary indicator whether used app at least once in 5 consecutive weeks
gen week5_dv = consecwks==5
tab consecwks week5_dv, miss
lab var week5_dv "used app 5 consecutive weeks"
lab def week5_dv 1 "used app 5 weeks" 0 "did not use app 5 weeks"
lab val week5_dv week5_dv

drop tagwk tagwksum seq consecwks


* IP9 outcome
tab ip9outcome i_indmode 

* whether has smartphone or tablet
tab1 i_scsmtphone  i_sctablet
gen smtab_dv = i_scsmtphone==1 | i_sctablet==1 if i_scsmtphone~=-8 // if not 'ineligible' (Q not asked)
lab var smtab_dv "has smartphone or tablet"
lab def smtab_dv 0 "no" 1 "yes"
lab val smtab_dv smtab_dv
tab smtab_dv, miss
tab smtab_dv


* hypothetical willingness
tab i_scwill5sp if i_scwill5sp~=-8, nolab
tab i_scwill5tb if i_scwill5tb~=-8

gen willsptb_dv = i_scwill5sp==1 | i_scwill5sp==2 | i_scwill5tb==1 | i_scwill5tb==2 if smtab_dv==1
lab var willsptb_dv "very/somewhat willing on SM or TB"
lab def willsptb_dv 0 "no" 1 "yes"
lab val willsptb_dv willsptb_dv


gen willsptb2_dv = i_scwill5sp==1 | i_scwill5sp==2 | i_scwill5sp==3 | i_scwill5tb==1 | i_scwill5tb==2 | i_scwill5tb==3 if smtab_dv==1
lab var willsptb2_dv "very/somewhat/a little willing on SM or TB"
lab def willsptb2_dv 0 "no" 1 "yes"
lab val willsptb2_dv willsptb2_dv


* whether completed self-completion module
tab i_indmode  // N=61 CATI respondents, N=69 refusals to CASI?
tab i_scflag
tab i_scflag, nolab
gen nosc_dv = (i_scflag==0 | i_scflag==3 | i_scflag==-7)
lab var nosc_dv "whether completed self-completion"
lab def nosc_dv 1 "no sc" 0 "yes sc"
lab val nosc_dv nosc_dv
tab i_scflag nosc_dv, miss

* whether uses internet
tab i_netpuse
tab i_netpuse, nolab
gen internet_dv = i_netpuse>=1 & i_netpuse<=5
lab var internet_dv "whether uses internet"
lab def internet_dv 0 "does not use internet" 1 "uses internet"
lab val internet_dv internet_dv
tab i_netpuse internet_dv, miss

recode i_netpuse (1=1 "every day") (2=2 "several times a week") (4 5 =3 "several times a month or less") (min/0 6 7=4 "never/no access") , gen(internet)
tab i_netpuse internet, miss

* age bands
tab i_pdvage
recode i_pdvage (16/30=1 "16-30") (31/40=2 "31-40") (41/50=3 "41-50") (51/60=4 "51-60") (61/70=5 "61-70") (71/max=6 "71+") (min/-1=.), gen(age_dv)
lab var age_dv "age banded"
tab i_pdvage age_dv, miss

* income 

tab i_fimngrs_dv, miss
mvdecode i_fimngrs_dv, mv(-8)

* fca consent
tab i_fcacon1
recode i_fcacon1 (-8 -2 -1 =2 "no/DK/REF") (1=1 "yes") (-7=.), gen(fca)
tab i_fcacon1 fca, miss

* HH food expenditure
mvdecode i_xpfood1_g3 i_xpfdout_g3, mv(-9/-1)
gen hhfood = i_xpfood1_g3 + i_xpfdout_g3
*replace hhfood = i_xpfood1_g3 if i_xpfood1_g3<. & i_xpfdout_g3==.
*replace hhfood = i_xpfdout_g3 if i_xpfdout_g3<. & i_xpfood1_g3==.
list i_xpfood i_xpfdout hhfood  in 1/100
list i_xpfood i_xpfdout hhfood if i_xpfood==. | i_xpfdout==.
lab var hhfood "hh expenditure on food in last 4 weeks"

* HH utilities expenditure
mvdecode i_xpduely i_xpelecy i_xpgasy i_xpoily i_xpsfly, mv(-8) 
gen utilities = i_xpduely if i_xpduely >=0 & i_xpduely<.
replace utilities = 0 if utilities==.
foreach w in i_xpelecy i_xpgasy i_xpoily i_xpsfly {
	replace utilities = utilities + `w' if  `w'>=0 & `w'<.
	}
replace utilities = -1 if (i_xpduely<0 | i_xpelecy<0 | i_xpgasy<0 | i_xpoily<0 | i_xpsfly<0)
lab def utilities -1 "dk/refused"
lab val utilities utilities
lab var utilities "annual HH spend on fuel"
list i_xpduely - i_xpsfly utilities

* problems paying bills
lab list i_xphsdb i_xphsdct i_xphsdba
tab1 i_xphsdb i_xphsdct i_xphsdba
*recode i_xphsdba (2 3 =1 "behind paying bills") (1=2 "up to date paying bills") (min/-1=-1 "dk/refused"), gen(bills)
recode i_xphsdba (2 3 =1 "behind paying bills") (1=2 "up to date paying bills") (min/-1=.), gen(bills)
tab i_xphsdba bills, miss
replace bills = 1 if i_xphsdb==1
tab i_xphsdb bills, miss
tab i_xphsdct bills
replace bills = 1 if i_xphsdct==1
tab bills
bysort pidp: gen tag=_n==1
tab bills if tag
*recode bills -1=2
drop tag

* how well getting by financially
recode i_finnow (3 4 5 = 3) (min/-1=.)


* highest qualification
tab i_hiqual_dv
lab list i_hiqual_dv
recode i_hiqual_dv (1=1 "degree") (2 3 4 = 2 "GCSE, A-level, other higher") (-9 5 9 = 3 "other, none or missing"), gen(qual)
tab i_hiqual_dv qual, miss
tab qual


* whether has a device - including reasons for missing info
gen smtabmiss = .
replace smtabmiss = -1 if (i_netpuse==6 | i_netpuse==7) 
replace smtabmiss = -2 if (i_scflag==0 & smtabmiss==.)
replace smtabmiss = -3 if (i_scflag==3 & smtabmiss==.)
replace smtabmiss = -4 if (i_scsmtphone<0 & i_sctablet<0 & smtabmiss==.)
replace smtabmiss = 1 if (i_scsmtphone==1 | i_sctablet==1)
replace smtabmiss = 0 if ((i_scsmtphone>=0 & i_scsmtphone<.) | (i_sctablet>=0 & i_sctablet<.)) & smtabmiss==.
lab def smtabmiss 0 "no device" 1 "mobile device" ///
	-1 "never uses internet" -2 "CATI: no self-completion" -3 "CASI ndeclined" -4 "SP and TB Q missing", modify
lab val smtabmiss smtabmiss
lab var smtabmiss "whether mobile device status known"
tab smtabmiss smtab_dv, miss


sort pidp
save $j/cr-7, replace

          


